import pandas as pd
import numpy as np
from IPython.core.display import display
import sys
sys.path.append('/Users/kyohei/code/myPackages')
import stock
from tqdm import tqdm
import math
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
%matplotlib inline
from IPython.core.display import display
import plotly
from plotly import tools
import plotly.graph_objs as go
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=False)
from pypfopt import expected_returns
from pypfopt import risk_models
from pypfopt.efficient_frontier import EfficientFrontier
from tslearn.utils import to_time_series_dataset
from tslearn.clustering import KShape
import pickle
def pickle_dump(obj, path):
with open(path, mode='wb') as f:
pickle.dump(obj,f)
def pickle_load(path):
with open(path, mode='rb') as f:
data = pickle.load(f)
return data
# print(pickle_load('funds.pickle'))
funds = pickle_load('funds.pickle') #stock.getFunds()
fee_df= pd.read_csv('../fee.csv', index_col=0)
df = pd.read_csv('../funds.csv', index_col=0)
def getTmpDf(jp_name=False, year=None, possession=False):
df = pd.read_csv('../funds.csv', index_col=0)
df.columns = [c.split('_')[-1] for c in df.columns]
if possession == True:
df = df.loc[:,list(possession_funds.keys())]
if year is not None:
df = df.query('"{}-12-31" < date < "{}-01-01"'.format(year-1, year+1))
if jp_name == True:
df.columns = list(map(lambda x: funds[x]['fund_name'], list(df.columns)))
return df
def buyFunds(target_fund, yaer, timing, amount_plan):
"""
IN : TargetFundDataFrame, BuyingTiming, BuyingAmountsList
OUT : df
"""
profit_df = pd.DataFrame(columns=['month', 'amount', 'amount_sum' , 'quantity',
'purchase_price', 'evaluation_price',
'profit', 'actual_profit'])
profit_df['month'] = [i for i in range(1,13)]
profit_df['amount'] = amount_plan
profit_df['amount_sum'] = profit_df['amount'].cumsum()
profit_df = profit_df.set_index('month')
query = '"{yaer}-{:0=2}-{:0=2}" <= date < "{yaer}-{:0=2}-{:0=2}"'.replace('{yaer}', str(yaer))
for i, amount in enumerate(profit_df['amount']):
if amount == 0:
continue
i = i+1
target_range = pd.concat([target_fund.query(query.format(i, timing, i, timing+10)),
target_fund.query(query.format(i+1, 1, i+1, 1+10))])
purchase_price = target_range.iat[0, 0]
profit_df.loc[i, ['purchase_price']] = target_range.iat[0, 0]
profit_df.loc[i, ['quantity']] = math.ceil(profit_df.loc[i, ['amount']]['amount'] /
(profit_df.loc[i, ['purchase_price']]['purchase_price']/10000))
profit_df.loc[i, ['evaluation_price']] = target_fund.query(query.format(i, 1, i, 31)).iat[-1, 0]
profit_df['quantity'] = profit_df['quantity'].cumsum()
profit_df['profit'] = profit_df['quantity'] * (profit_df['evaluation_price']/10000)
profit_df['actual_profit'] = profit_df['quantity'] * (profit_df['evaluation_price']/10000) - profit_df['amount_sum']
return profit_df
def plotEfficientFrontier(df):
# calc return & risk
mu = expected_returns.mean_historical_return(df)
S = risk_models.sample_cov(df)
# Portfolio optimization
ef = EfficientFrontier(mu, S)
trets = np.arange(0.1, 0.3, 0.01)
tvols = []
for tr in trets:
w = ef.efficient_return(target_return=tr)
w = pd.Series(w).values
v = np.sqrt(np.dot(w.T, np.dot(np.array(S), w)))
tvols += [v]
plt.style.use('ggplot')
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(tvols, trets, marker='x')
ax.set_xlim([0, 0.15])
ax.set_ylim([0, 0.3])
ax.set_xlabel('Volatility')
ax.set_ylabel('Expected return')
ax.grid(True)
plt.show()
return ef
遷移
df_tmp = getTmpDf(year=2019).sort_index()
data = []
for fund in df_tmp.columns:
data.append(go.Scatter(x=df_tmp.index, y=df_tmp[fund], name=funds[fund.split('_')[-1]]['fund_name']))
plotly.offline.iplot(data, filename='')
基準値遷移
df_tmp = getTmpDf(year=2019).sort_index()
data = []
for fund in df_tmp.columns:
df_tmp[fund] = df_tmp[fund].apply(lambda x: x / df_tmp[fund][0])
data.append(go.Scatter(x=df_tmp.index, y=df_tmp[fund], name=funds[fund.split('_')[-1]]['fund_name']))
plotly.offline.iplot(data, filename='')
偏差値
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
df_tmp = df_tmp.apply(lambda x: x[-1] - x[0])
df_tmp = df_tmp.map(lambda x: (x - df_tmp.mean()) / df_tmp.std()*10+50).sort_values()[::-1]
# paint
colors = ['lightslategray',] * len(list(df_tmp.index))
tmp = list(map(lambda x : funds[x.split('_')[-1]]['fund_name'], list(df_tmp.index)))
# for i in [tmp.index(v) for v in possession_funds_old.values()]:
# colors[i] = 'crimson'
fig = go.Figure(data=[
go.Bar(name='',
x=list(map(lambda x : funds[x.split('_')[-1]]['fund_name'], list(df_tmp.index))),
y=list(df_tmp.values),
marker_color=colors
)])
fig.show()
利益
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
amount_plan = [10000]*12
data = []
profit_df = pd.DataFrame(columns=list(df_tmp.columns))
for fund in profit_df.columns:
profit_df[fund] = buyFunds(df_tmp.loc[:,[fund]], 2019, 1, amount_plan).loc[:,'actual_profit']
data.append(go.Scatter(x=profit_df.index, y=profit_df[fund], name=funds[fund.split('_')[-1]]['fund_name']))
plotly.offline.iplot(data, filename='')
手数料考慮利益
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
amount_plan = [10000]*12
profit_df = pd.DataFrame(columns=list(df_tmp.columns))
for fund in profit_df.columns:
profit_df[fund] = buyFunds(df_tmp.loc[:,[fund]], 2019, 1, amount_plan).loc[:,'profit']
profit_df[fund] = profit_df[fund] - profit_df[fund] * (fee_df['fee'][fund]/100)
profit_df['amount_sum'] = amount_plan
profit_df['amount_sum'] = profit_df['amount_sum'].cumsum()
profit_df = profit_df.apply(lambda x:x-profit_df['amount_sum'])
profit_df = profit_df.drop('amount_sum', axis=1)
data = []
for fund in profit_df.columns:
data.append(go.Scatter(x=profit_df.index, y=profit_df[fund], name=funds[fund.split('_')[-1]]['fund_name']))
plotly.offline.iplot(data, filename='')
# rank
profit_rank = profit_df.loc[12,:].sort_values()[:-11:-1]
layout = plotly.graph_objs.Layout({
'xaxis': {'title': 'TOP10 Profit'},
'yaxis': {'title': 'Profit', 'range': [profit_rank.values[-1]*0.9, profit_rank.values[0]]},
'barmode': 'relative'}
)
fig = go.Figure(data=[go.Bar(
x=[funds[x]['fund_name'] for x in profit_rank.index], y=profit_rank.values,
text= profit_rank.values,
textposition='auto',
)],
layout=layout)
fig.show()
効率的フロンティア
df_tmp = getTmpDf(jp_name=True, year=2019).sort_index()
ef = plotEfficientFrontier(df_tmp)
df_tmp = pd.DataFrame.from_dict(ef.min_volatility(), orient='index',columns=['amount'])
df_tmp.sort_values('amount', ascending=False)[:10]
リターンとリスク
df_tmp = getTmpDf(jp_name=True, year=2019)
mu, S = expected_returns.mean_historical_return(df_tmp), risk_models.sample_cov(df_tmp)
df_tmp = pd.DataFrame(mu, columns=['return'])
df_tmp['risk'] = np.diag(S.values, k=0)
df_tmp = df_tmp.sort_values('return')
fig = go.Figure(data=[
go.Bar(name='return', x=list(df_tmp.index), y=df_tmp['return'].values),
go.Bar(name='risk', x=list(df_tmp.index), y=df_tmp['risk'].values)
])
fig.update_layout(barmode='group')
fig.show()
時系列クラスタリング
to_time_series_dataset(getTmpDf(jp_name=True, year=2019).values)
ks = KShape(n_clusters=10)
rs = ks.fit_predict(to_time_series_dataset(getTmpDf(year=2019).dropna(axis='columns').pct_change().dropna(how='all').T.values))
plot_df = pd.DataFrame(None, columns=getTmpDf(year=2019).dropna(axis='columns').columns).T
plot_df['return'] = getTmpDf(year=2019).pct_change().dropna(how='all').mean()
plot_df['risk'] = getTmpDf(year=2019).pct_change().dropna(how='all').std()
plot_df['name'] = list(map(lambda x: funds[x]['fund_name'], list(plot_df.index)))
group = [[] for i in range(10)]
for c, i in zip(rs, getTmpDf(year=2019).dropna(axis='columns').columns):
group[c].append(i)
plot_df['group'] = None
for i, g in enumerate(group):
plot_df.loc[g, ['group']] = str(i)
import plotly.express as px
fig = px.scatter(plot_df, x='risk', y='return', color='group', hover_data=['name'])
fig.show()